package com.lagou.cluster.config;

import java.util.Queue;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.MonitorableScheduler;

public abstract class AbstractScheduler implements MonitorableScheduler {
	
	private final Set<String> history;
	
	private final Queue<Request> queue;
	
	protected Logger logger = LoggerFactory.getLogger(getClass());
	
	public AbstractScheduler(Queue<Request> queue, 
			Set<String> history) {
		this.history = history;
		this.queue = queue;
	}
	
	@Override
	public int getTotalRequestsCount(Task task) {
		return this.history.size();
	}

	/**
	 * 判断是否重复
	 * @param req
	 * @return
	 */
	public boolean isDuplicate(Request req) {
		return !history.add(req.getUrl());
	}

	public void clearHistory() {
		this.history.clear();
	}

	/**
	 * 爬虫出队
	 * @param task
	 * @return
	 */
	@Override
	public Request poll(Task task) {
		return queue.poll();
	}

	@Override
	public int getLeftRequestsCount(Task task) {
		return this.queue.size();
	}

	/**
	 * 爬虫入队
	 * @param request
	 * @param task
	 */
	@Override
	public void push(Request request, Task task) {
		logger.trace("get a candidate url {}", request.getUrl());
		if (!isDuplicate(request) || shouldReserved(request)) {
			logger.debug("push to queue {}", request.getUrl());
			this.queue.offer(request);
		}
	}
	
	protected boolean shouldReserved(Request request) {
		return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
	}
}